#!bin/env/python
##Created by David Langelaan 2008
##a program for catenating .pdb files from the max clust program output.

class Treenode:
    def __init__(self,name,branch1,branch2, RMSD=0):
        self.name = str(name)
        self.branches = [branch1,branch2]
        self.nexus = ''
        self.RMSD = RMSD
    def printnode(self):
        self.nexus +='('
        if type(self.branches[0]) == type(Treenode(1,2,3)):
            self.nexus += self.branches[0].printnode()
            a = '%7s'   %str(float(self.RMSD) - float(self.branches[0].RMSD))
            self.nexus += ':' + a.strip() + ','
        else:
            for key in structurekey:
                if str(self.branches[0]) == key[0]:
                    self.nexus += str(int(key[1][8:11])) + ':' + self.RMSD +','
            #self.nexus += str(self.branches[0]) +':'+self.RMSD+ ','
        if type(self.branches[1]) == type(Treenode(1,2,3)):
            self.nexus += self.branches[1].printnode()
            a = '%7s' %str(float(self.RMSD) -float(self.branches[1].RMSD))
            self.nexus += ':' + a.strip() + ')' + self.name
        else:
            for key in structurekey:
                if str(self.branches[1]) == key[0]:
                    self.nexus += str(int(key[1][8:11])) + ':' + self.RMSD + ')' + self.name
            #self.nexus += str(self.branches[1]) + ':' + self.RMSD + ')'
        return self.nexus

class Cluster:
    def __init__(self, name):
        self.name = str(name)
        self.pdbs = []
        self.structs = []

    def getpdbs(self):
        for struct in self.structs:
            for structure in structurekey:
                if struct == structure[0]:
                    self.pdbs.append(structure[1])

    def writecatpdb(self, Rvalue = ''):
        out = open(inputdirectory +outputfiledescriptor+ self.name +Rvalue+ '.pdb', 'w')
        number = 1
        for pdb in self.pdbs:
            print >> out, 'MODEL        ' + str(number)
            f = open(inputdirectory + pdb)
            for line in f:
                if line == 'END\n':
                    print >> out, 'TER\nENDMDL'
                    number +=1
                else:
                    print >> out, line,
            f.close()
        print >> out, 'END'
        out.close()

    def writepdblist(self, Rvalue = ''):
        out = open(inputdirectory+outputfiledescriptor+self.name+Rvalue+'pdblist.txt', 'w')
        for pdb in cluster.pdbs:
            print >> out, pdb
        out.close()

    def createnewpdbdir(self, Rvalue = ''):
        outdir = (inputdirectory + outputfiledescriptor +self.name + Rvalue +'/')
        #try loop in case the directory is already made
        try:
            os.mkdir(outdir)
        except OSError:
            pass
        i = 1
        for pdb in self.pdbs:
            f = open(inputdirectory + pdb)
            a = f.readlines()
            f.close()
            end = '%03d.pdb' % i
            out = open(outdir + pdbdescriptor + end, 'w')
            out.writelines(a)
            out.close
            i +=1
            
                        
#Gets the structure numbers present in a given cluster
def getstructnumbers(index):
    line = index
    a = lines[index].split()
    ClusterStructures = []
    reading = False
    for spot, number in enumerate(a):
        if reading == True:
            ClusterStructures.append(number)
        if number == 'ARE':
            reading = True
    while reading == True:
        line +=1
        a = lines[line].split()
        if a[0] == 'FINAL':
            return ClusterStructures
        else:
            for spot, number in enumerate(a):
                    ClusterStructures.append(number)

def readinputfile(inputfile):
    f = open(inputdirectory + inputfile)
    lines = f.readlines()
    f.close()
    return lines

    
    #Determining which structure corresponds to which PDB file
def makestructurekey(lines):
    structurekey = []
    for index, line in enumerate(lines):
        try:
            if line.split()[0] == 'STRUCTURE' and 'IS' in line.split():
                structurekey.append([line.split()[1], lines[index+1].split()[0]])
        except IndexError:
            continue
    return structurekey

##Creates the cluster objects with pdb files.
def makeclusters():
    Clusters = []
    for clust in clusterstoparse:
        Clusters.append(Cluster(str(clust)))
        for index, line in enumerate(lines):
            if 'STRUCTURES IN CLUSTER  ' + str(clust) +' ARE' in line:
                Clusters[-1].structs = getstructnumbers(index)
        Clusters[-1].getpdbs()
    return Clusters

##Parses the data file and puts the tree into NEXUS format
def CreateNexusTree(lines, rvalue):
    Nodelist = []
    for line in lines:
        if 'FORMED BY ROTATING CLUSTER' in line:
            branch1 = line.split()[6]
            branch2 = line.split()[9][:-1]
            for node in Nodelist:
                if node.name == str(line.split()[6]):
                    branch1 = node
                elif node.name == line.split()[9][:-1]:
                    branch2 = node
            if rvalue == 'R1' or rvalue == 'R2':
                RMSD = line.split()[-1]
            elif rvalue == 'R4':
                RMSD = line.split()[13][:-1]
            Nodelist.append(Treenode(line.split()[1],branch1, branch2, RMSD))
    return Nodelist

#prints off the Tree in Nexus Format
def Printnode(Node, rvalue):
    a = Node.printnode()
    print >> treeout, '     Tree ' + rvalue +'= '+ a + ';'


###########################
###################
#Stuff to change in the program, plus the main loop itself. 

inputdirectory= '/private/var/automount/mnt/david/apelin_17/Apelin_5C_H2O_conformera/round37fixed_analysis/top80struct/range_13-17/'
Typestoparse = ['R1','R2','R4'] #the types of output files made my the clusterpose program
clusterstoparse = [150,151,152,153,154,155,156,157,158] #the specific cluster numbers you would like to parse
outputfiledescriptor = 'Cluster'##file descriptors, change depending on how you named your files/how you want them named. 
inputfiledescriptor = 'Cluster'
inputtailing = '_13-17.txt'
treeoutputfile = 'Rtrees.tre'
pdbdescriptor = 'Sortout_'
        
#A loop to do the calculations for all fo the Rvalues determined.
import os
treeout = open(inputdirectory + treeoutputfile, 'w')
print >> treeout, '#NEXUS'
print >> treeout, 'begin trees;'
for rvalue in Typestoparse:
    inputfile = inputfiledescriptor + rvalue + inputtailing
    lines = readinputfile(inputfile)
    structurekey = makestructurekey(lines)
    Clusters = makeclusters()
    for cluster in Clusters:
        cluster.writecatpdb(rvalue)
        cluster.writepdblist(rvalue)
        cluster.createnewpdbdir(rvalue)
    Nodelist = CreateNexusTree(lines, rvalue)
    Printnode(Nodelist[-1], rvalue)

print >> treeout, 'END;'
treeout.close()
print 'Finished, now get to work'

        
